Presentado por: Ayelen Opazo y Catherine Cruz
library(RMySQL)
## Loading required package: DBI
library(DBI)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.1.3
## 
## Attaching package: 'RSQLite'
## The following object is masked from 'package:RMySQL':
## 
##     isIdCurrent
library(odbc)
#*library(RevoScaleR)¨*

sort(unique(odbcListDrivers()[[1]]))
## [1] "Microsoft Access Driver (*.mdb, *.accdb)"              
## [2] "Microsoft Access Text Driver (*.txt, *.csv)"           
## [3] "Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)"
## [4] "MySQL ODBC 8.0 ANSI Driver"                            
## [5] "MySQL ODBC 8.0 Unicode Driver"                         
## [6] "ODBC Driver 17 for SQL Server"                         
## [7] "SQL Server"                                            
## [8] "SQL Server Native Client 11.0"
sqldb=dbConnect(odbc(), 
                Driver="SQL Server",
                Server="157.92.26.17,1443;",
                Database="AdventureWorks2019",
                uid="Alumno", 
                pwd="mrcd2022")
Subtitle: "Exploración de la base y el negocio"
Oficinas_de_venta=dbGetQuery(sqldb, 
"SELECT [Group] as Zona
FROM Sales.SalesTerritory;")

barplot(prop.table(table(Oficinas_de_venta$Zona)),col=c(colors()[616]), main="Ventas por zona - General")

Categorías de productos
Categorías_de_productos=dbGetQuery(sqldb, 
"SELECT Name
FROM Production.ProductCategory")
Categorías_de_productos
Subcategorias de productos
ProductSubcategory=dbGetQuery(sqldb, 
"SELECT pc.Name as Category, ps.Name as Subcategory
FROM Production.ProductCategory pc
left join Production.ProductSubcategory ps on ps.ProductCategoryID=pc.ProductCategoryID")
ProductSubcategory
Productos más vendidos y sus categorías / subcategorías
Productosmasvendidos=dbGetQuery(sqldb, 
"SELECT p.Name AS Nombre_producto, s.Name AS Subcategoria, c.Name AS Categoria, COUNT(*) Cantidad_vendida_historica
FROM Sales.SalesOrderDetail d INNER JOIN Production.Product p ON d.ProductID=p.ProductID
INNER JOIN Production.ProductSubcategory s ON p.ProductSubcategoryID=s.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON s.ProductCategoryID=c.ProductCategoryID
GROUP BY d.ProductID, p.Name, s.Name, c.Name
having COUNT(*) >= '2000'
ORDER BY Cantidad_vendida_historica DESC;")
Productosmasvendidos
Productosvendidostotal=dbGetQuery(sqldb, 
"SELECT d.SalesOrderID, p.Name AS Nombre_producto, s.Name AS Subcategoria, c.Name AS Categoria
FROM Sales.SalesOrderDetail d INNER JOIN Production.Product p ON d.ProductID=p.ProductID
INNER JOIN Production.ProductSubcategory s ON p.ProductSubcategoryID=s.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON s.ProductCategoryID=c.ProductCategoryID")


barplot(prop.table(table(Productosvendidostotal$Categoria)),col=colors()[616], main = "Ventas por categoria de producto")

Subcategorias mas vendidas  (Road Bikes)
Cantidad_de_ventas_agrupada=dbGetQuery(sqldb, 
"SELECT c.Name AS Nombre_categoria, s.Name AS Nombre_subcategoria,  COUNT(*) Cantidad_vendida
FROM Sales.SalesOrderDetail d INNER JOIN Production.Product p ON d.ProductID=p.ProductID
INNER JOIN Production.ProductSubcategory s ON p.ProductSubcategoryID=s.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON s.ProductCategoryID=c.ProductCategoryID
GROUP BY s.Name ,c.Name 
ORDER BY Cantidad_vendida DESC;")
Cantidad_de_ventas_agrupada
library(ggplot2)
plt2 <- ggplot(Cantidad_de_ventas_agrupada) +
        geom_col(aes(Cantidad_de_ventas_agrupada$Cantidad_vendida,sort(Cantidad_de_ventas_agrupada$Nombre_subcategoria, 
        decreasing =    TRUE)), fill=colors()[616],  width = 0.5, decreasing = TRUE, col=colors()[616])+
        theme(panel.background = element_rect(fill = "white")) +
        labs(title = "Ventas por subcategoria de producto", x ="Cantidad vendida", y = "Subcategoria")
## Warning: Ignoring unknown parameters: decreasing
plt2
## Warning: Use of `Cantidad_de_ventas_agrupada$Cantidad_vendida` is discouraged.
## Use `Cantidad_vendida` instead.
## Warning: Use of `Cantidad_de_ventas_agrupada$Nombre_subcategoria` is
## discouraged. Use `Nombre_subcategoria` instead.

Total ordenes
total_ordenes=dbGetQuery(sqldb, "SELECT COUNT(*) Cantidad_ordenes_total
FROM Sales.SalesOrderHeader;")
total_ordenes
total_resellers=dbGetQuery(sqldb, "with Total as (SELECT DISTINCT(SalesPersonID)
FROM Sales.SalesOrderHeader
WHERE SalesPersonID IS NOT NULL)
select count(*) as Total_resellers from Total;")
total_resellers
total_ordenes_resellers=dbGetQuery(sqldb, "SELECT COUNT(*) Cantidad_ordenes_resellers
FROM Sales.SalesOrderHeader
WHERE SalesPersonID IS NOT NULL;")
total_ordenes_resellers
library(ggplot2)
total_ordenes_x_vendedor=dbGetQuery(sqldb, "SELECT SalesPersonID, count(SalesOrderID) as q_ordenes
FROM Sales.SalesOrderHeader
WHERE SalesPersonID IS NOT NULL
group by SalesPersonID
order by q_ordenes desc")
total_ordenes_x_vendedor$SalesPersonID = as.factor(total_ordenes_x_vendedor$SalesPersonID)

plt <- ggplot(total_ordenes_x_vendedor) +
        geom_col(aes(total_ordenes_x_vendedor$q_ordenes,sort(total_ordenes_x_vendedor$SalesPersonID, 
        decreasing =    TRUE)), fill=colors()[616],  width = 0.5, decreasing = TRUE, col=colors()[616])+
        theme(panel.background = element_rect(fill = "white")) +
        labs(title = "Total de ordenes por vendedor General", x ="Cantidad de ordenes", y = "VendedorID")
## Warning: Ignoring unknown parameters: decreasing
plt
## Warning: Use of `total_ordenes_x_vendedor$q_ordenes` is discouraged. Use
## `q_ordenes` instead.
## Warning: Use of `total_ordenes_x_vendedor$SalesPersonID` is discouraged. Use
## `SalesPersonID` instead.

Cantidad_ordenes_tipo_compra=dbGetQuery(sqldb, "SELECT 'fisica' AS tipo_compra, COUNT(*) Cantidad_ordenes,
FORMAT(SUM(TotalDue),'C') Monto_total
FROM Sales.SalesOrderHeader
WHERE OnlineOrderFlag = 0
GROUP BY OnlineOrderFlag
UNION 
SELECT 'online' AS tipo_compra, COUNT(*) Cantidad_ordenes, FORMAT(SUM(TotalDue),'C') Monto_tota
FROM Sales.SalesOrderHeader
WHERE OnlineOrderFlag = 1
GROUP BY OnlineOrderFlag
ORDER BY 2 DESC;")
Cantidad_ordenes_tipo_compra
Canales_de_venta=dbGetQuery(sqldb, 
"SELECT OnlineOrderFlag
FROM Sales.SalesOrderHeader;")

pie(table(Canales_de_venta), values = "%",labels=c("Ventas reseller","Ventas Online"), col=c('White',colors()[616]), main="Cantidad de ordenes por tipo de venta")
## Warning in text.default(1.1 * P$x, 1.1 * P$y, labels[i], xpd = TRUE, adj =
## ifelse(P$x < : "values" is not a graphical parameter

## Warning in text.default(1.1 * P$x, 1.1 * P$y, labels[i], xpd = TRUE, adj =
## ifelse(P$x < : "values" is not a graphical parameter
## Warning in title(main = main, ...): "values" is not a graphical parameter

EDA LineTotal en cantidades
EDA_LineTotal=dbGetQuery(sqldb, 
"SELECT 'resellers' as Tipo_compra, count(LineTotal) as cantidad,
count(distinct LineTotal) as cardinalidad,
max(LineTotal) - min(LineTotal) as rango,
avg(LineTotal) as media,
stdev(LineTotal) as desviacion_estandar,
exp(avg(log(LineTotal))) as media_geometrica,
count(LineTotal) / Sum(1/LineTotal) as media_armonica
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
WHERE OnlineOrderFlag = 0
UNION
SELECT 'ventas_online' as Tipo_compra, count(LineTotal) as cantidad,
count(distinct LineTotal) as cardinalidad,
max(LineTotal) - min(LineTotal) as rango,
avg(LineTotal) as media,
stdev(LineTotal) as desviacion_estandar,
exp(avg(log(LineTotal))) as media_geometrica,
count(LineTotal) / Sum(1/LineTotal) as media_armonica
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
WHERE OnlineOrderFlag = 1;")
EDA_LineTotal
#install.packages("ggthemes")
library(tidyverse) # tiene ggplot, dplyr, tidyr, y otros
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.4     v purrr   0.3.4
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggthemes)  # estilos de gráficos
## Warning: package 'ggthemes' was built under R version 4.1.3
library(ggrepel)   # etiquetas de texto más prolijas que las de ggplot
## Warning: package 'ggrepel' was built under R version 4.1.2
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
EDA_LineTotal_base=dbGetQuery(sqldb, 
"SELECT OnlineOrderFlag as Tipo_compra, LineTotal as cantidad, h.SalesOrderID
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
order by Tipo_compra;")

ggplot(EDA_LineTotal_base, aes(
  x = cantidad,
  group = Tipo_compra,
  fill = Tipo_compra)) +
  geom_density(alpha=0.7,adjust =2)+
  labs(x="Distribución dela cantidad", y="",
       title=" LineTotal según tipo de compra")+
  scale_x_continuous(limits = c(0,50000))+
  scale_fill_gdocs()+
  theme(legend.position = "bottom",
        plot.title      = element_text(size=12))+
  facet_wrap(~ Tipo_compra, scales = "free")

¿las cantidades son significativamente diferentes en promedio para las compras online vs. resellers?
#install.packages("nortest")
library(nortest)
ks.test(EDA_LineTotal_base$cantidad, pnorm)
## Warning in ks.test(EDA_LineTotal_base$cantidad, pnorm): ties should not be
## present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  EDA_LineTotal_base$cantidad
## D = 0.98878, p-value < 2.2e-16
## alternative hypothesis: two-sided
library(dplyr)
wilcox.test(as.numeric(EDA_LineTotal_base[c(1:60919),2]), as.numeric(EDA_LineTotal_base[c(60920:121317),2]), var.equal=TRUE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  as.numeric(EDA_LineTotal_base[c(1:60919), 2]) and as.numeric(EDA_LineTotal_base[c(60920:121317), 2])
## W = 2866461636, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
Las cantidades promedio de las compras online son significativamente diferentes al promedio de las compras por reseller. El promedio de cantdades compradas por reseller es mayor. 
EDA TotalDue
EDA_TotalDue=dbGetQuery(sqldb, 
"SELECT 'resellers' as Tipo_compra, count(TotalDue) as cantidad, sum(TotalDue) as Monto,
count(distinct TotalDue) as cardinalidad,
max(TotalDue) - min(TotalDue) as rango,
avg(TotalDue) as media,
stdev(TotalDue) as desviacion_estandard,
exp(avg(log(TotalDue))) as media_geometrica,
count(TotalDue) / Sum(1/TotalDue) as media_armonica
FROM Sales.SalesOrderHeader 
WHERE OnlineOrderFlag = 0
UNION
SELECT 'ventas_online' as Tipo_compra, count(TotalDue) as cantidad, sum(TotalDue) as Monto,
count(distinct TotalDue) as cardinalidad,
max(TotalDue) - min(TotalDue) as rango,
avg(TotalDue) as media,
stdev(TotalDue) as desviacion_estandard,
exp(avg(log(TotalDue))) as media_geometrica,
count(TotalDue) / Sum(1/TotalDue) as media_armonica
FROM Sales.SalesOrderHeader
WHERE OnlineOrderFlag = 1;")
EDA_TotalDue
#install.packages("ggthemes")
library(tidyverse) # tiene ggplot, dplyr, tidyr, y otros
library(ggthemes)  # estilos de gráficos
library(ggrepel)   # etiquetas de texto más prolijas que las de ggplot
library(scales)

EDA_TotalDue_base=dbGetQuery(sqldb, 
"SELECT [OnlineOrderFlag] as Tipo_compra, TotalDue as Monto, SalesOrderID
FROM Sales.SalesOrderHeader
ORDER BY Tipo_compra ;")


ggplot(EDA_TotalDue_base, aes(
  x = Monto,
  group = Tipo_compra,
  fill = Tipo_compra)) +
  geom_density(alpha=0.7,adjust =2)+
  labs(x="Distribución del Monto", y="",
       title=" Monto total según tipo tipo de compra")+
  scale_x_continuous(limits = c(0,50000))+
  scale_fill_gdocs()+
  theme(legend.position = "bottom",
        plot.title      = element_text(size=12))+
  facet_wrap(~ Tipo_compra, scales = "free")
## Warning: Removed 621 rows containing non-finite values (stat_density).

¿los montos son significativamente diferentes en promedio para las compras online vs. resellers?
#install.packages("nortest")
library(nortest)
ks.test(EDA_TotalDue_base$Monto, pnorm)
## Warning in ks.test(EDA_TotalDue_base$Monto, pnorm): ties should not be present
## for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  EDA_TotalDue_base$Monto
## D = 0.99548, p-value < 2.2e-16
## alternative hypothesis: two-sided
El monto no se distribuye normal, se utiliza test no parametrico
library(dplyr)
wilcox.test(as.numeric(EDA_TotalDue_base[c(1:3806),2]), as.numeric(EDA_TotalDue_base[c(3807:31465),2]), var.equal=TRUE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  as.numeric(EDA_TotalDue_base[c(1:3806), 2]) and as.numeric(EDA_TotalDue_base[c(3807:31465), 2])
## W = 89497698, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
El monto promedio de las compras online es significativamente diferente al promedio de las compras por reseller. El promedio de compras por reseller es mayor. 
Cantidad y montos de compras (online/fisica) por zonas
Cantidad_montos_por_compras_zonas=dbGetQuery(sqldb, 
"SELECT t.Name as Lugar, t.[Group] as Zona, 'resellers' as Tipo_compra, count(LineTotal) as cantidad,
count(distinct LineTotal) as cardinalidad,
max(LineTotal) - min(LineTotal)  as rango,
avg(LineTotal) as media,
round(stdev(LineTotal), 2) as desviacion_estandard,
round(exp(avg(log(LineTotal))),2) as media_geometrica,
count(LineTotal) / Sum(1/LineTotal) as media_armonica
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID 
WHERE OnlineOrderFlag = 0
GROUP BY t.Name, t.[Group] 
UNION
SELECT t.Name as Lugar, t.[Group]  as Zona, 'ventas_online' as Tipo_compra, count(LineTotal) as cantidad,
count(distinct LineTotal) as cardinalidad,
max(LineTotal) - min(LineTotal) as rango,
avg(LineTotal) as media,
round(stdev(LineTotal), 2) as desviacion_estandard,
round(exp(avg(log(LineTotal))),2) as media_geometrica,
count(LineTotal) / Sum(1/LineTotal) as media_armonica
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID 
WHERE OnlineOrderFlag = 1
GROUP BY t.Name, t.[Group] 
ORDER BY 1, 2, 3;")
Cantidad_montos_por_compras_zonas
Cantidad_montos_por_compras_Zona=dbGetQuery(sqldb, 
"SELECT t.[Group]  as Zona, 'resellers' as Tipo_compra, count(h.SalesOrderID) as cantidad_ventas,
FORMAT(sum(h.TotalDue), 'C') as monto_total
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
WHERE OnlineOrderFlag = 0
GROUP BY t.[Group]
UNION
SELECT t.[Group]  as Zona, 'ventas_online' as Tipo_compra,  count(h.SalesOrderID) as cantidad_ventas,
FORMAT(sum(h.TotalDue), 'C') as monto_total
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
WHERE OnlineOrderFlag = 1
GROUP BY t.[Group] 
ORDER BY Tipo_compra asc,monto_total desc;")
Cantidad_montos_por_compras_Zona
Cantidad ventas online por Zona
cantidad_ventas_online=dbGetQuery(sqldb, 
"SELECT t.[Group]  as Zona,  count(h.SalesOrderID) as cantidad_ventas,
FORMAT(sum(h.TotalDue), 'C') as monto_total
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
WHERE OnlineOrderFlag = 1
GROUP BY t.[Group] 
ORDER BY 3 DESC;")
cantidad_ventas_online
Cantidad_ventas_online_z=dbGetQuery(sqldb, 
"SELECT t.[Group]  as Zona,  h.SalesOrderID as Factura,
FORMAT(h.TotalDue, 'C') as monto_total
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
WHERE OnlineOrderFlag = 1
ORDER BY 3 DESC;")

barplot(prop.table(table(Cantidad_ventas_online_z$Zona)),col=c(colors()[616]), main="Cantidad ventas por zona - Online", values="%")
## Warning in plot.window(xlim, ylim, log = log, ...): "values" is not a graphical
## parameter
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : "values" is not a graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "values" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "values" is not
## a graphical parameter

cantidad_ventas_resellers=dbGetQuery(sqldb, 
"SELECT t.[Group]  as Zona,  count(h.SalesOrderID) as cantidad_ventas,
FORMAT(sum(h.TotalDue), 'C') as monto_total
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
WHERE OnlineOrderFlag = 0
GROUP BY t.[Group] 
ORDER BY 3 DESC;")
cantidad_ventas_resellers
Cantidad_ventas_resellers_z=dbGetQuery(sqldb, 
"SELECT t.[Group]  as Zona,  h.SalesOrderID as Factura,
FORMAT(h.TotalDue, 'C') as monto_total
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
WHERE OnlineOrderFlag = 0
ORDER BY 3 DESC;")

barplot(prop.table(table(Cantidad_ventas_resellers_z$Zona)),col=c(colors()[616]), main="Cantidad ventas por zona - Resellers")

descriptivos_ventas=dbGetQuery(sqldb, 
"SELECT t.[Group] as Zona, 'venta_fisica' as Tipo_compra, count(LineTotal) as Cantidad,
count(distinct LineTotal) as cardinalidad,
max(LineTotal) - min(LineTotal)  as rango,
avg(LineTotal) as media,
round(stdev(LineTotal), 2) as desviacion_estandard,
round(exp(avg(log(LineTotal))),2) as media_geometrica,
count(LineTotal) / Sum(1/LineTotal) as media_armonica
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID 
WHERE OnlineOrderFlag = 0
GROUP BY t.[Group] 
UNION
SELECT t.[Group]  as Zona, 'ventas_online' as Tipo_compra, count(LineTotal) as Cantidad,
count(distinct LineTotal) as cardinalidad,
max(LineTotal) - min(LineTotal) as rango,
avg(LineTotal) as media,
round(stdev(LineTotal), 2) as desviacion_estandard,
round(exp(avg(log(LineTotal))),2) as media_geometrica,
count(LineTotal) / Sum(1/LineTotal) as media_armonica
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID 
WHERE OnlineOrderFlag = 1
GROUP BY t.[Group]")
descriptivos_ventas
descriptivos_ventas_G=dbGetQuery(sqldb, 
"SELECT h.SalesOrderID,t.[Group] as Zona, OnlineOrderFlag as Tipo_compra, LineTotal as Cantidad
FROM Sales.SalesTerritory t INNER JOIN Sales.SalesOrderHeader h ON t.TerritoryID=h.TerritoryID
INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID")
descriptivos_ventas_G
g = ggplot(descriptivos_ventas_G, aes(Zona, fill=Tipo_compra) ) +
  labs(title = "Compras por zona y tipo de compra")+ylab("") +
  theme(plot.title = element_text(size = rel(1), colour = "black") )+
theme(panel.background = element_rect(fill = "white"))
g+geom_bar(position="dodge") + scale_fill_manual(values = alpha(c(colors()[616], colors()[615]), 1)) +
  theme(axis.title.x = element_text(face="bold", size=3))

Características Resellers que mas ventas tienen
Total_vendedores=dbGetQuery(sqldb, 
"SELECT *
FROM HumanResources.vEmployeeDepartmentHistory 
WHERE Department = 'Sales' and EndDate is null;")
Total_vendedores
Datos_vendedores2=dbGetQuery(sqldb,
"SELECT *
FROM HumanResources.vEmployee
WHERE BusinessEntityID IN (SELECT BusinessEntityID
                            FROM HumanResources.vEmployeeDepartmentHistory 
                            WHERE Department = 'Sales')")
Datos_vendedores2
Datos_vendedores2$Lugar=Datos_vendedores2$CountryRegionName
Ubicacion Vendedores por Region
library(lessR)
## Warning: package 'lessR' was built under R version 4.1.3
## 
## lessR 4.2.0                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")   Read text, Excel, SPSS, SAS, or R data file
##   d is default data frame, data= in analysis routines optional
## 
## Learn about reading, writing, and manipulating data, graphics,
## testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables.
##   Enter:  browseVignettes("lessR")
## 
## View changes in this or recent versions of lessR.
##   Enter: help(package=lessR)  Click: Package NEWS
##   Enter: interact()  for access to interactive graphics
##   New function: reshape_long() to move data from wide to long
## 
## Attaching package: 'lessR'
## The following object is masked from 'package:scales':
## 
##     rescale
## The following objects are masked from 'package:dplyr':
## 
##     recode, rename
PieChart(Lugar, hole = 0, values = "%", data = Datos_vendedores2)

## >>> Suggestions
## PieChart(Lugar, hole=0)  # traditional pie chart
## PieChart(Lugar, values="%")  # display %'s on the chart
## PieChart(Lugar)  # bar chart
## Plot(Lugar)  # bubble plot
## Plot(Lugar, values="count")  # lollipop plot 
## 
## --- Lugar --- 
## 
##          Lugar Count   Prop 
## --------------------------- 
##      Australia    1   0.056 
##         Canada    2   0.111 
##         France    1   0.056 
##        Germany    1   0.056 
## United Kingdom    1   0.056 
##  United States   12   0.667 
## --------------------------- 
##          Total   18   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 32.667, df = 5, p-value = 0.000 
## >>> Low cell expected frequencies, so chi-squared approximation may not be accurate 
## 
library(lessR)
PieChart(CountryRegionName, hole = 0, values = "%", data = Datos_vendedores2)

## >>> Suggestions
## PieChart(CountryRegionName, hole=0)  # traditional pie chart
## PieChart(CountryRegionName, values="%")  # display %'s on the chart
## PieChart(CountryRegionName)  # bar chart
## Plot(CountryRegionName)  # bubble plot
## Plot(CountryRegionName, values="count")  # lollipop plot 
## 
## --- CountryRegionName --- 
## 
##  CountryRegnNm Count   Prop 
## --------------------------- 
##      Australia    1   0.056 
##         Canada    2   0.111 
##         France    1   0.056 
##        Germany    1   0.056 
## United Kingdom    1   0.056 
##  United States   12   0.667 
## --------------------------- 
##          Total   18   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 32.667, df = 5, p-value = 0.000 
## >>> Low cell expected frequencies, so chi-squared approximation may not be accurate 
## 
library(lessR)
PieChart(StateProvinceName, hole = 0, values = "%", data = Datos_vendedores2)

## >>> Suggestions
## PieChart(StateProvinceName, hole=0)  # traditional pie chart
## PieChart(StateProvinceName, values="%")  # display %'s on the chart
## PieChart(StateProvinceName)  # bar chart
## Plot(StateProvinceName)  # bubble plot
## Plot(StateProvinceName, values="count")  # lollipop plot 
## 
## --- StateProvinceName --- 
## 
## StateProvncNm Count   Prop 
## -------------------------- 
##       Alberta    1   0.056 
##    California    1   0.056 
##       England    1   0.056 
##       Gironde    1   0.056 
##       Hamburg    1   0.056 
## Massachusetts    1   0.056 
##      Michigan    1   0.056 
##     Minnesota    1   0.056 
##       Ontario    1   0.056 
##        Oregon    1   0.056 
##     Tennessee    1   0.056 
##          Utah    1   0.056 
##      Victoria    1   0.056 
##    Washington    5   0.278 
## -------------------------- 
##         Total   18   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 11.556, df = 13, p-value = 0.564 
## >>> Low cell expected frequencies, so chi-squared approximation may not be accurate 
## 
Nivel organizacional y rol:
Nivel_organizacional=dbGetQuery(sqldb,
"SELECT DISTINCT OrganizationLevel, JobTitle
FROM HumanResources.Employee
WHERE BusinessEntityID IN (SELECT BusinessEntityID
                            FROM HumanResources.vEmployeeDepartmentHistory 
                            WHERE Department = 'Sales');")

Nivel_organizacional
Todos los vendedores que venden, todas las ventas online no presentan vendendor, solo las fisicas.
Vendedor_que_vendieron=dbGetQuery(sqldb,
"SELECT DISTINCT(SalesPersonID) 
FROM Sales.SalesOrderHeader
WHERE SalesPersonID IS NOT NULL;")
Vendedor_que_vendieron
Dentro de los vendedores se encuentran 3 managers 3 North American Sales Manager, European Sales Manager, Pacific Sales Manage y tienen ventas
Suma_rol=dbGetQuery(sqldb, 
"SELECT OrganizationLevel, JobTitle, COUNT(*) Cantidad_ordenes
FROM Sales.SalesOrderHeader h INNER JOIN HumanResources.Employee e ON h.SalesPersonID=e.BusinessEntityID
WHERE SalesPersonID IS NOT NULL
GROUP BY OrganizationLevel, JobTitle
ORDER BY Cantidad_ordenes DESC;")
Suma_rol
library(lessR)
Suma_rol2=dbGetQuery(sqldb, 
"SELECT JobTitle
FROM Sales.SalesOrderHeader h INNER JOIN HumanResources.Employee e ON h.SalesPersonID=e.BusinessEntityID
WHERE SalesPersonID IS NOT NULL")

PieChart(JobTitle, hole = 0, values = "%", data = Suma_rol2, legend = c("Sales Representative","North American Sales Manager", "European Sales Manager", "Pacific Sales Manager"), fill =  c("pink","blue","aquamarine1",colors()[616]))

## >>> Suggestions
## PieChart(JobTitle, hole=0)  # traditional pie chart
## PieChart(JobTitle, values="%")  # display %'s on the chart
## PieChart(JobTitle)  # bar chart
## Plot(JobTitle)  # bubble plot
## Plot(JobTitle, values="count")  # lollipop plot 
## 
## --- JobTitle --- 
## 
##             JobTitle  Count   Prop 
## ----------------------------------- 
## EuropeanSalesManager     39   0.010 
## NorthAmericanSlsMngr     48   0.013 
##  PacificSalesManager     16   0.004 
## Sales Representative   3703   0.973 
## ----------------------------------- 
##                Total   3806   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 10609.439, df = 3, p-value = 0.000
TOP 10 mayor cantidad de ventas por reseller, año y mes
Top_10=dbGetQuery(sqldb, 
"SELECT TOP 10 SalesPersonID,  OrganizationLevel, JobTitle, YEAR(OrderDate) Año, MONTH(OrderDate) Mes, COUNT(*) Cantidad_ordenes
FROM Sales.SalesOrderHeader h INNER JOIN HumanResources.Employee e ON h.SalesPersonID=e.BusinessEntityID
WHERE SalesPersonID IS NOT NULL
GROUP BY SalesPersonID,  OrganizationLevel, JobTitle, YEAR(OrderDate), MONTH(OrderDate) 
ORDER BY Cantidad_ordenes DESC;")
Top_10
Cantidad de ventas por Managers, año y mes
Ventas_manager_anual=dbGetQuery(sqldb, 
"SELECT SalesPersonID,  OrganizationLevel, JobTitle, YEAR(OrderDate) Año, MONTH(OrderDate) Mes, COUNT(*) Cantidad_ordenes
FROM Sales.SalesOrderHeader h INNER JOIN HumanResources.Employee e ON h.SalesPersonID=e.BusinessEntityID
WHERE SalesPersonID IS NOT NULL
AND JobTitle != 'Sales Representative'
GROUP BY SalesPersonID,  OrganizationLevel, JobTitle, YEAR(OrderDate), MONTH(OrderDate) 
ORDER BY Cantidad_ordenes DESC;")
Ventas_manager_anual
Ventas North American Sales
Ventas_North_Sales_Manager=dbGetQuery(sqldb, 
"SELECT YEAR(OrderDate) AS Año, COUNT(*) Cantidad_ordenes
FROM Sales.SalesOrderHeader h INNER JOIN HumanResources.Employee e ON h.SalesPersonID=e.BusinessEntityID
WHERE SalesPersonID IS NOT NULL
AND JobTitle = 'North American Sales Manager'
GROUP BY YEAR(OrderDate)
ORDER BY Año; ")
Ventas_North_Sales_Manager
Ventas European Sales Manager
Ventas_European_Sales=dbGetQuery(sqldb,
"SELECT YEAR(OrderDate) AS Año, COUNT(*) Cantidad_ordenes
FROM Sales.SalesOrderHeader h INNER JOIN HumanResources.Employee e ON h.SalesPersonID=e.BusinessEntityID
WHERE SalesPersonID IS NOT NULL
AND JobTitle = 'European Sales Manager'
GROUP BY YEAR(OrderDate)
ORDER BY Año; ")
Ventas_European_Sales
Ventas Pacific Sales Manager
Ventas_Pacific_Sales=dbGetQuery(sqldb,
"SELECT YEAR(OrderDate) AS Año, COUNT(*) Cantidad_ordenes
FROM Sales.SalesOrderHeader h INNER JOIN HumanResources.Employee e ON h.SalesPersonID=e.BusinessEntityID
WHERE SalesPersonID IS NOT NULL
AND JobTitle = 'Pacific Sales Manager'
GROUP BY YEAR(OrderDate)
ORDER BY Año; ")
Ventas_Pacific_Sales
TOP 5 resellers con mayores montos vendidos
Top_5_Sales=dbGetQuery(sqldb,
"SELECT TOP 5 SalesPersonID, OrganizationLevel, JobTitle, FORMAT(SUM(TotalDue), 'C') Monto_total
FROM Sales.SalesOrderHeader h INNER JOIN HumanResources.Employee e ON h.SalesPersonID=e.BusinessEntityID
WHERE SalesPersonID IS NOT NULL
GROUP BY SalesPersonID, OrganizationLevel, JobTitle
ORDER BY Monto_total DESC;")
Top_5_Sales
Cantidad ordenes por lugar
Ordenes_por_lugar=dbGetQuery(sqldb,
"SELECT SalesPersonID, t.Name AS Zona, t.[Group] AS Lugar
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesPerson p ON h.SalesPersonID=p.BusinessEntityID 
INNER JOIN Sales.SalesTerritory t ON p.TerritoryID=t.TerritoryID
GROUP BY SalesPersonID, t.Name, t.[Group]
HAVING COUNT(SalesOrderID) IN (SELECT COUNT(*) Cantidad_ordenes
                                    FROM Sales.SalesOrderHeader h1
                                    WHERE SalesPersonID IS NOT NULL
                                    GROUP BY SalesPersonID
                                    HAVING COUNT(*)>250)
ORDER BY COUNT(SalesOrderID) DESC;")
Ordenes_por_lugar
De dónde son los resellers que realizaron más de 250 ventas totales
Datos lugar y telefono de todos los resellers (vista)
Info_vendedores=dbGetQuery(sqldb,
"SELECT *
FROM sales.vSalesPerson;")
Info_vendedores
DATAFRAME con características de Resellers
DATAFRAME_Resellers=dbGetQuery(sqldb,
"WITH Resellers AS
(
SELECT SalesPersonID, CONCAT(pp.FirstName, ' ', pp.LastName) as Reseller, t.Name as Zona, t.[Group] as Lugar, e.JobTitle, v.Department, v.GroupName, e.BirthDate, e.MaritalStatus, e.Gender, e.HireDate, v.StartDate
FROM Sales.SalesOrderHeader h INNER JOIN HumanResources.Employee e ON h.SalesPersonID=e.BusinessEntityID 
INNER JOIN Person.Person pp ON e.BusinessEntityID=pp.BusinessEntityID
INNER JOIN HumanResources.vEmployeeDepartment v ON pp.BusinessEntityID=v.BusinessEntityID
INNER JOIN Sales.SalesPerson p ON v.BusinessEntityID =p.BusinessEntityID 
INNER JOIN Sales.SalesTerritory t ON p.TerritoryID=t.TerritoryID
GROUP BY SalesPersonID, CONCAT(pp.FirstName, ' ', pp.LastName), t.Name, t.[Group] , e.JobTitle, v.Department, v.GroupName, 
e.BirthDate, e.MaritalStatus, e.Gender, e.HireDate, v.StartDate)

SELECT*, DATEDIFF(D, StartDate, HireDate) as Datediff, DATEDIFF (YEAR, BirthDate, HireDate) as Edad
FROM Resellers;")
DATAFRAME_Resellers
hist(x = DATAFRAME_Resellers$Edad, main ="Distribución Edad", xlab = "Densidad", col=colors()[616])

Segmentado por género
Segmentacion_genero=dbGetQuery(sqldb,
"SELECT E.Gender, COUNT(*) Casos
FROM [HumanResources].[Employee] E
LEFT JOIN [Person].[Person] P ON E.BusinessEntityID=P.[BusinessEntityID]
WHERE P.[PersonType]='SP'
GROUP BY E.Gender")
Segmentacion_genero
Segmentacion_genero_tp=dbGetQuery(sqldb,
"SELECT P.PersonType, E.Gender, COUNT(*) Casos
FROM [HumanResources].[Employee] E
LEFT JOIN [Person].[Person] P ON E.BusinessEntityID=P.[BusinessEntityID]
GROUP BY P.[PersonType],E.Gender
order by PersonType")
Segmentacion_genero_tp
Segmentacion edad vendedor
Segmentacion_edad=dbGetQuery(sqldb,
"SELECT DATEDIFF (YEAR, e.BirthDate,e.HireDate) as Edad, COUNT(*) conteo
FROM [HumanResources].[Employee] e
LEFT JOIN [Person].[Person] P ON E.BusinessEntityID=P.[BusinessEntityID] and p.[PersonType]='SL'
GROUP BY DATEDIFF (YEAR, e.BirthDate, e.HireDate), p.[PersonType];")
Segmentacion_edad
#install.packages("ggthemes")
library(tidyverse) # tiene ggplot, dplyr, tidyr, y otros
library(ggthemes)  # estilos de gráficos
library(ggrepel)   # etiquetas de texto más prolijas que las de ggplot
library(scales)

Segmentacion_edad_detalle=dbGetQuery(sqldb,
"SELECT  p.[PersonType], DATEDIFF (YEAR, e.BirthDate, e.HireDate) as Edad
FROM [HumanResources].[Employee] e
LEFT JOIN [Person].[Person] P ON E.BusinessEntityID=P.[BusinessEntityID] and p.[PersonType]='SL' 
GROUP BY DATEDIFF (YEAR, e.BirthDate, e.HireDate), p.[PersonType];")
#Segmentacion_edad
Segmentacion_edad_detalle
hist(x = Segmentacion_edad_detalle$Edad, main ="Distribución Edad", xlab = "Densidad", col=colors()[616])

Segmentado por lugar
Segmentacion_lugar=dbGetQuery(sqldb,
"SELECT Lugar, COUNT(*) Resellers
FROM (SELECT SalesPersonID, CONCAT(pp.FirstName, ' ', pp.LastName) as Reseller, t.Name as Zona, t.[Group] as Lugar, e.JobTitle, v.Department, v.GroupName, e.BirthDate, e.MaritalStatus, e.Gender, e.HireDate, v.StartDate
FROM Sales.SalesOrderHeader h INNER JOIN HumanResources.Employee e ON h.SalesPersonID=e.BusinessEntityID 
INNER JOIN Person.Person pp ON e.BusinessEntityID=pp.BusinessEntityID
INNER JOIN HumanResources.vEmployeeDepartment v ON pp.BusinessEntityID=v.BusinessEntityID
INNER JOIN Sales.SalesPerson p ON v.BusinessEntityID =p.BusinessEntityID 
INNER JOIN Sales.SalesTerritory t ON p.TerritoryID=t.TerritoryID
GROUP BY SalesPersonID, CONCAT(pp.FirstName, ' ', pp.LastName), t.Name, t.[Group] , e.JobTitle, v.Department, v.GroupName, 
e.BirthDate, e.MaritalStatus, e.Gender, e.HireDate, v.StartDate)
Resellers
GROUP BY Lugar; ")
Segmentacion_lugar
DATAFRAME con características de consumos de productos
DATAFRAME_productos=dbGetQuery(sqldb,
"SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID
--WHERE OnlineOrderFlag=1 para filtrar por tipo_compra
ORDER BY CustomerID;")
DATAFRAME_productos
No hay compras onlines y físicas por el mismo cliente:
compras_clientes=dbGetQuery(sqldb,
"WITH compras_clientes AS
(
SELECT CustomerID, COUNT(OnlineOrderFlag) cantidad_compras, SUM(TotalDue) monto_compras, 'física' as tipo_compra
FROM Sales.SalesOrderHeader
WHERE OnlineOrderFlag = 0
GROUP BY CustomerID
UNION
SELECT CustomerID, COUNT(OnlineOrderFlag) cantidad_compras, SUM(TotalDue) monto_compras, 'online' as tipo_compra
FROM Sales.SalesOrderHeader
WHERE OnlineOrderFlag = 1
GROUP BY CustomerID
--ORDER BY 1
)
 
SELECT *, DENSE_RANK() OVER (ORDER BY CustomerID) control_cliente_duplicado
FROM compras_clientes;")
compras_clientes
EDA productos
compras_productos=dbGetQuery(sqldb,
"WITH Productos AS
(
SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID
WHERE OnlineOrderFlag=0)

SELECT MAX(UnitPrice) producto_mas_caro, MIN(UnitPrice) producto_menos_caro, AVG(UnitPrice) media_precio,
MAX(Productos_x_anio) maxima_cantidad_x_anio, MAX(Gastos_x_anio) maximo_gasto_x_año, MAX(Total_productos) maxima_cantidad_total, 
MAX(Total_gastos) gasto_maximo_total, MIN(Total_gastos) gasto_minimo_total
FROM Productos;")

compras_productos
Producto más caro comprado y frecuencia
compras_productos=dbGetQuery(sqldb,
"WITH Productos AS

(
SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID
WHERE OnlineOrderFlag=1 -- para filtrar por tipo_compra
--ORDER BY CustomerID;
)

SELECT ProductID, Producto, Subcategoria, Categoria, UnitPrice, COUNT(ProductID) Frecuencia
FROM Productos 
WHERE UnitPrice = (SELECT MAX(p.UnitPrice)
                    FROM Productos p)
GROUP BY ProductID, Producto, Subcategoria, Categoria, UnitPrice
HAVING COUNT(ProductID) >= ALL (SELECT COUNT(pr.ProductID)
                                FROM Productos pr
                                WHERE pr.UnitPrice = (SELECT MAX(pr1.UnitPrice)
                                                        FROM Productos pr1)
                                GROUP BY pr.ProductID);")
compras_productos
Producto menos caro y frecuencia
compra_producto_barato=dbGetQuery(sqldb,
"WITH Productos AS

(
SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID
WHERE OnlineOrderFlag=0 -- para filtrar por tipo_compra
--ORDER BY CustomerID;
)

SELECT ProductID, Producto, Subcategoria, Categoria, UnitPrice, COUNT(ProductID) Frecuencia
FROM Productos
WHERE UnitPrice = (SELECT MIN(p.UnitPrice)
                    FROM Productos p)
GROUP BY ProductID, Producto, Subcategoria, Categoria, UnitPrice;")
compra_producto_barato
Productos más frecuentes 
productos_frecuentes=dbGetQuery(sqldb,
"WITH Productos AS

(
SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID
--WHERE OnlineOrderFlag=0 -- para filtrar por tipo_compra
--ORDER BY CustomerID;
)

SELECT ProductID, Producto, Subcategoria, Categoria, UnitPrice, COUNT(ProductID) Frecuencia, OnlineOrderFlag
FROM Productos 
GROUP BY ProductID, Producto, Subcategoria, Categoria, UnitPrice, OnlineOrderFlag
ORDER BY Frecuencia DESC;")
productos_frecuentes
productos_menos_frecuentes
Productos_menos_frecuentes =dbGetQuery(sqldb,
"WITH Productos AS

(
SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID
WHERE OnlineOrderFlag=0 -- para filtrar por tipo_compra
--ORDER BY CustomerID;
)

SELECT ProductID, Producto, Subcategoria, Categoria, UnitPrice, COUNT(ProductID) Frecuencia
FROM Productos 
GROUP BY ProductID, Producto, Subcategoria, Categoria, UnitPrice
ORDER BY Frecuencia;")
Productos_menos_frecuentes
Cliente que más compró
Cliente_mayor_compra=dbGetQuery(sqldb,
"WITH Productos AS

(
SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID
--WHERE OnlineOrderFlag=1 -- para filtrar por tipo_compra
--ORDER BY CustomerID;
)

SELECT DISTINCT CustomerID, t.[Group] AS Lugar, OnlineOrderFlag, SalesPersonID, d.JobTitle
FROM HumanResources.vEmployeeDepartment d INNER JOIN Productos p ON d.BusinessEntityID=p.SalesPersonID
INNER JOIN Sales.SalesTerritory t ON p.TerritoryID=t.TerritoryID
WHERE Total_productos = (SELECT MAX(p1.Total_productos)
                            FROM Productos p1);")

Cliente_mayor_compra
Cliente que más compró
Cliente_max_compra=dbGetQuery(sqldb,
"SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID")
Cliente_max_compra
##WHERE CustomerID=29722 AND SalesPersonID=274;
Cliente que más gastó
Cliente_mas_gasto=dbGetQuery(sqldb,
"WITH Productos AS

(
SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID
--WHERE OnlineOrderFlag=1 -- para filtrar por tipo_compra
--ORDER BY CustomerID;
)

SELECT DISTINCT CustomerID, t.[Group] AS Lugar, OnlineOrderFlag, SalesPersonID, d.JobTitle
FROM HumanResources.vEmployeeDepartment d INNER JOIN Productos p ON d.BusinessEntityID=p.SalesPersonID
INNER JOIN Sales.SalesTerritory t ON p.TerritoryID=t.TerritoryID
WHERE Total_gastos = (SELECT MAX(p1.Total_gastos)
                            FROM Productos p1);")
Cliente_mas_gasto
Cliente_con_mas_gasto=dbGetQuery(sqldb,
"SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID
WHERE CustomerID=29818 AND SalesPersonID=274;")
Cliente_con_mas_gasto
Consumos de categorías más frecuentes por tipo de compra
Cliente_categorias_frecuentes=dbGetQuery(sqldb,
"WITH Productos AS

(
SELECT CustomerID, OnlineOrderFlag, SalesPersonID, TerritoryID, YEAR(OrderDate) AS Anio, MONTH(OrderDate) AS Mes, d.ProductID, p.Name AS Producto,
c.Name AS Categoria, ps.Name AS Subcategoria, UnitPrice, COUNT(d.ProductID) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Productos_x_anio,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID, YEAR(OrderDate) ) Gastos_x_anio,
COUNT(d.ProductID) OVER (PARTITION BY CustomerID) Total_productos,
SUM(d.LineTotal) OVER (PARTITION BY CustomerID) Total_gastos
FROM Sales.SalesOrderHeader h INNER JOIN Sales.SalesOrderDetail d ON h.SalesOrderID=d.SalesOrderID
INNER JOIN Production.Product p ON d.ProductID=p.ProductID 
INNER JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID=ps.ProductSubcategoryID
INNER JOIN Production.ProductCategory c ON ps.ProductCategoryID=c.ProductCategoryID
--WHERE OnlineOrderFlag=1 -- para filtrar por tipo_compra
--ORDER BY CustomerID;
)

SELECT Categoria, OnlineOrderFlag, COUNT(*) Consumos
FROM Productos
GROUP BY Categoria, OnlineOrderFlag
ORDER BY Categoria, OnlineOrderFlag;")
Cliente_categorias_frecuentes
Ordenes por año
Ordenes_compra =dbGetQuery(sqldb, 
"SELECT YEAR(OrderDate) Año, COUNT(*) Cantidad_ordenes
FROM Sales.SalesOrderHeader
GROUP BY YEAR(OrderDate)
ORDER BY Año;")
Ordenes_compra
Ordenes_compra=dbGetQuery(sqldb, 
"with tabla as (
select [SalesOrderID] , [TotalDue],
case
when month([OrderDate])<10 then cast(concat(cast(year([OrderDate]) as varchar),'0', cast(month([OrderDate]) as varchar)) as int)
else cast(concat(cast(year([OrderDate]) as varchar), cast(month([OrderDate]) as varchar)) as int) 
end periodo_orden_compra
from [Sales].[SalesOrderHeader]
) 
Select periodo_orden_compra, count([SalesOrderID]) as q_ordenes , sum([TotalDue]) as total_facturas
from tabla group by periodo_orden_compra
order by periodo_orden_compra asc ;")
Ordenes_compra
año <- c(201105:201406)
set.seed(1234)
Total_venta <- Ordenes_compra$total_facturas
datos <- data.frame(cbind(año, Total_venta))

library(ggplot2)
ggplot(datos, aes(x=año, y=Total_venta )) + 
  geom_line(colour="red")  + 
  geom_point( size=1, shape=21, fill="white", colour="red") + 
  theme_minimal()

library(astsa)
library(TSA)
## 
## Attaching package: 'TSA'
## The following object is masked from 'package:lessR':
## 
##     kurtosis
## The following object is masked from 'package:readr':
## 
##     spec
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
adf.test(Ordenes_compra$total_facturas, k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Ordenes_compra$total_facturas
## Dickey-Fuller = -8.6997, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
library(forecast)
## Registered S3 methods overwritten by 'forecast':
##   method       from
##   fitted.Arima TSA 
##   plot.Arima   TSA
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:astsa':
## 
##     gas
fit_general <- auto.arima(Ordenes_compra$total_facturas)
summary(fit_general)
## Series: Ordenes_compra$total_facturas 
## ARIMA(1,1,1) 
## 
## Coefficients:
##           ar1      ma1
##       -0.6111  -0.5719
## s.e.   0.1619   0.1685
## 
## sigma^2 = 2463087529433:  log likelihood = -580.05
## AIC=1166.11   AICc=1166.84   BIC=1170.94
## 
## Training set error measures:
##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 274158.8 1506198 1088084 -170.5262 203.0469 0.5670397 -0.08807366
plot(forecast(fit_general,h=6))

Compras fisicas por año
Ordenes_compra_fisica1=dbGetQuery(sqldb, 
"SELECT YEAR(OrderDate) Año, COUNT(*) Cantidad_ordenes
FROM Sales.SalesOrderHeader
WHERE SalesPersonID IS NOT NULL
GROUP BY YEAR(OrderDate)
ORDER BY Año;")
Ordenes_compra_fisica1
Ordenes_compra_fisica <- dbGetQuery(sqldb,
"with tabla as (
select [SalesOrderID] , [TotalDue],[OnlineOrderFlag],
case
when month([OrderDate])<10 then cast(concat(cast(year([OrderDate]) as varchar),'0', cast(month([OrderDate]) as varchar)) as int)
else cast(concat(cast(year([OrderDate]) as varchar), cast(month([OrderDate]) as varchar)) as int) 
end periodo_orden_compra
from [Sales].[SalesOrderHeader]
) 
Select periodo_orden_compra, count([SalesOrderID]) as q_ordenes , sum([TotalDue]) as total_facturas_compra_fisica
from tabla where OnlineOrderFlag = 0 
group by periodo_orden_compra
order by periodo_orden_compra asc ;")
Ordenes_compra_fisica
año <- c(201105:201406)
set.seed(1234)
Ventas_fisicas <- Ordenes_compra_fisica$total_facturas_compra_fisica
datos <- data.frame(cbind(año, Ventas_fisicas))

library(ggplot2)
ggplot(datos, aes(x=año, y=Ventas_fisicas )) + 
  geom_line(colour="blue")  + 
  geom_point( size=1, shape=21, fill="white", colour="blue") + 
  theme_minimal()

adf.test(Ordenes_compra_fisica$total_facturas_compra_fisica, k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Ordenes_compra_fisica$total_facturas_compra_fisica
## Dickey-Fuller = -8.7225, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
Pronostico compras fisicas
library(forecast)
fit_fisica <- auto.arima(Ordenes_compra_fisica$total_facturas_compra_fisica)
summary(fit_fisica)
## Series: Ordenes_compra_fisica$total_facturas_compra_fisica 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1       mean
##       -0.3871  2677698.9
## s.e.   0.1627   160449.4
## 
## sigma^2 = 1759481821506:  log likelihood = -526.63
## AIC=1059.25   AICc=1060.05   BIC=1063.83
## 
## Training set error measures:
##                     ME    RMSE     MAE       MPE     MAPE     MASE       ACF1
## Training set -19336.97 1286850 1048367 -4760.707 4785.396 0.595436 0.08058387
plot(forecast(fit_fisica,h=6))

Compras online por año
Ordenes_compra_fisica1=dbGetQuery(sqldb, 
"SELECT YEAR(OrderDate) Año, COUNT(*) Cantidad_ordenes
FROM Sales.SalesOrderHeader
WHERE SalesPersonID IS NULL
GROUP BY YEAR(OrderDate)
ORDER BY Año;")
Ordenes_compra_fisica1
Ordenes_compra_online=dbGetQuery(sqldb, 
"with tabla as (
select [SalesOrderID] , [TotalDue],[OnlineOrderFlag],
case
when month([OrderDate])<10 then cast(concat(cast(year([OrderDate]) as varchar),'0', cast(month([OrderDate]) as varchar)) as int)
else cast(concat(cast(year([OrderDate]) as varchar), cast(month([OrderDate]) as varchar)) as int) 
end periodo_orden_compra
from [Sales].[SalesOrderHeader]
) 
Select periodo_orden_compra, count([SalesOrderID]) as q_ordenes , sum([TotalDue]) as total_facturas_compras_online
from tabla where OnlineOrderFlag = 1 
group by periodo_orden_compra
order by periodo_orden_compra asc");
Ordenes_compra_online
año <- c(201105:201406)
set.seed(1234)
Ventas_online <- Ordenes_compra_online$total_facturas_compras_online
datos <- data.frame(cbind(año, Ventas_online))

library(ggplot2)
ggplot(datos, aes(x=año, y=Ventas_online)) + 
  geom_line(colour="blue")  + 
  geom_point( size=1, shape=21, fill="white", colour="blue") + 
  theme_minimal()

adf.test(diff(Ordenes_compra_online$total_facturas_compras_online), k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(Ordenes_compra_online$total_facturas_compras_online)
## Dickey-Fuller = -4.0166, Lag order = 0, p-value = 0.01996
## alternative hypothesis: stationary
Pronostico compras online
fit_online <- auto.arima(Ordenes_compra_online$total_facturas_compras_online)
summary(fit_online)
## Series: Ordenes_compra_online$total_facturas_compras_online 
## ARIMA(0,1,0) 
## 
## sigma^2 = 153405832629:  log likelihood = -528.99
## AIC=1059.99   AICc=1060.1   BIC=1061.6
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 1004.474 386482.6 188929.9 -97.32796 118.1753 0.9736864 -0.1084915
plot(forecast(fit_online,h=6))